package com.amaake;

import com.amaake.Model.Zolurl;
import com.amaake.Model.Zolxx;
import com.jfinal.plugin.activerecord.Db;
import org.apache.log4j.Logger;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

/**
 * \* Created with IntelliJ IDEA.
 * \* User: Amaake
 * \* Date: 2016/10/12 0012
 * \* Time: 20:23
 * \* Description: 春眠不觉晓，起来敲代码。
 * \爬取主要数据内容
 */
public class ZolpcPageProcesser implements PageProcessor {

    private static Logger log = Logger.getLogger(ZolpcPageProcesser.class);

    public void process(Page page) {

        String url = page.getUrl().toString();
        log.info("url为"+url+"开始爬取------");
        Zolurl zolurl = Zolurl.dao.findFirst("select * from zolurl where url='"+url+"'");
        log.info("id为"+zolurl.getId()+"------title为"+zolurl.getTitle());
        List<Zolxx> zolxxList = Zolxx.dao.find("select * from zolxx where id="+zolurl.getId());
        if(zolxxList.size()>0){
            log.info("清理原有数据，然后更新数据");
            Db.update("DELETE FROM zolxx WHERE id="+zolurl.getId());
        }
        List<Selectable> list =  page.getHtml().xpath("//ul[@class='category-param-list']").nodes();
        Zolxx zolxx = null;
        for (Selectable ul :list){
            List<Selectable> lilist = ul.xpath("//li").nodes();
            for(Selectable li : lilist){
                log.info("html内容为"+li+"------");
                String newPmName_ = li.regex("newPmName_\\d+").toString();
                String newPmVal_ = li.regex("newPmVal_\\d+").toString();
                log.info("标题ID为"+newPmName_+"------");
                log.info("内容ID为"+newPmVal_+"------");
                String newPmName = li.xpath("//span[@id='"+newPmName_+"']/text()").toString();
                String newPmVal = li.xpath("//span[@id='"+newPmVal_+"']/text()").toString();
                if(newPmVal.equals("")){
                    newPmVal = li.xpath("//span[@id='"+newPmVal_+"']/a/text()").toString();
                }
                log.info("标题为"+newPmName+"------");
                log.info("内容为"+newPmVal+"------");
                log.info("开始存入数据库");
                zolxx = new Zolxx();
                zolxx.setId(zolurl.getId());
                zolxx.setNewPmName(newPmName);
                zolxx.setNewPmVal(newPmVal);
                zolxx.save();
            }
        }
    }



    public Site getSite() {
        return Site.me().setRetryTimes(3).setSleepTime(1000);
    }
}